import requests
from bs4 import BeautifulSoup

r = requests.get('http://python123.io/ws/demo.html')
demo = r.text #获取网站内容
soup = BeautifulSoup(demo,'lxml')

# print(soup.prettify()) #bs库是维护、解析html内容的功能库

'''
BS类的基本元素
Tag:
Name:
Attributes:
NavigableString:
Comment:
'''
# ## 获得Tag标签的指令
# print(soup.title)
# print(soup.a) #只能返回第一个a的标签内容
#
# ## 获取标签的名字
# print(soup.a.name)
# print(soup.a.parent.name)
# print(soup.a.parent.parent.name)
#
# ## 获取标签的属性
tag = soup.a
# print(tag.attrs) # 标签属性
# print(tag.attrs['href'])
# print(soup.p.attrs)
#
# ## 字符串string属性 Comment部分
print(type(tag.string))
# print(soup.p.string)

'''
标签树的下行遍历

.contents  子节点的列表
.children  子节点的迭代类型
.descendants 子孙节点的迭代类型，用于循环遍历

'''
print(soup.head.contents)
print(soup.body.contents)
print(len(soup.body.contents))

'''
标签树的上行遍历
'''
print(soup.html.parents)

for parent in soup.a.parents:
	if parent is None:
		print(parent)
	else:
		print(parent.name)

'''
标签树的平行遍历
'''
print(soup.a.next_sibling)
print(soup.a.next_sibling.next_sibling)
print(soup.a.previous_sibling)

'''
HTML的美化
'''
print(soup.prettify())